tot.effect = function(fit, regex) {
  # This function takes a fitted model and a regular expression for a group
  # of coefficients. It computes the sum of those coefficients and tests whether
  # the sum is statistically different from zero, using an F test and HAC standard errors
  eff = sum(coef(fit)[grep(regex, names(coef(fit)))])
  sum.test.vars = grep(regex, names(coef(fit)), value=TRUE)
  sum.test.vars = paste(sum.test.vars, collapse=' + ')
  library(car)
  library(sandwich)
  p.F = linearHypothesis(fit, sum.test.vars %&% ' = 0', test='F', vcov.=NeweyWest)$"Pr(>F)"[2]
  
  V = NeweyWest(fit)
  V = V[grep(regex,rownames(V)),grep(regex,rownames(V))]
  # Var(a+b+c)=V(a)+V(b)+V(c)+2(Cov(a,b)+Cov(b,c)+Cov(a,c)) = sum of elements of Vcov(a,b,c) matrix
  tot.se = sqrt(sum(V))
  dfs = length(fit$residuals) - length(coef(fit))
  p.T = 2*pt(abs(eff)/tot.se, df=dfs, lower.tail=FALSE)
  # p.F should = p.T if we did it right
  return(c(eff=eff, p.F=p.F, se=tot.se, p.T=p.T))
}

#### Setup and run spud count regressions ----
case = 'aggregate'
# case = 'nogas'
# case = 'below.median.gas'
# case = 'above.median.gas'
subfolder = '/Drilling Elasticity Regressions - Pooled/'

# Create output folder to store results
folder.out = output.dir%&%subfolder
dir.create(folder.out%&%'/Tables/', recursive = TRUE)
dir.create(folder.out%&%'/Figures/', recursive = TRUE)

## Choose which spudcounts data to use in regressions
spudcounts.reg = copy(spudcounts)

if (case=='nogas') {
  # Testing regressions on wells with no gas
  spudcounts.reg[, N:=NULL] # replace with "no gas" counts
  spudcounts.reg[, d.ln.N:=NULL] # replace with "no gas" counts
  # Merge in N and d.ln.N from spudcounts.nogas
  spudcounts.reg = merge(spudcounts.nogas, spudcounts.reg, by=c('prod_type','fed.land','offshore','spud_month'))
}

# Pool all offshore wells together.
spudcounts.offshore = spudcounts.reg[, .(N=sum(N)), by=.(spud_month, offshore)]
spudcounts.offshore[, d.ln.N := c(NA, diff(log(N+1))), by=offshore]
spudcounts.offshore = merge(spudcounts.reg[!duplicated(spud_month), .(spud_month, d.ln.wti, d.ln.hh, hh, moy)],
                       spudcounts.offshore, by=c('spud_month'))
par(mfrow=c(2,2))
plot(N~spud_month, data=spudcounts.offshore[offshore==0], type='l', lwd=2, col=1, main='Onshore, All Wells')
plot(N~spud_month, data=spudcounts.offshore[offshore==1], type='l', lwd=2, col=1, main='Offshore, All Wells')

spudcounts.offshore = spudcounts.offshore[offshore==1]

# Onshore, pool fed/nonfed, but keep oil and gas separate
if (case=='aggregate' | case=='nogas') spudcounts.onshore = spudcounts.reg[offshore==0, .(N=sum(N)), by=.(spud_month, offshore, prod_type)]
if (case=='above.median.gas') spudcounts.onshore = spudcounts.reg[offshore==0, .(N=sum(N.highgas)), by=.(spud_month, offshore, prod_type)]
if (case=='below.median.gas') spudcounts.onshore = spudcounts.reg[offshore==0, .(N=sum(N.lowgas)), by=.(spud_month, offshore, prod_type)]
spudcounts.onshore[, d.ln.N := c(NA, diff(log(N+1))), by=.(offshore, prod_type)]
spudcounts.onshore = merge(spudcounts.reg[!duplicated(spud_month), .(spud_month, d.ln.wti, d.ln.hh, hh, moy)],
                           spudcounts.onshore, by=c('spud_month'))
plot(N~spud_month, data=spudcounts.onshore[prod_type=='Oil'], type='l', lwd=2, col=1, main='Onshore, Oil Wells')
plot(N~spud_month, data=spudcounts.onshore[prod_type=='Gas'], type='l', lwd=2, col=1, main='Onshore, Gas Wells')

cols = c('#74645E','#88C4F4')
cols[2] = colorspace::lighten(cols[2], 0.5)
pdf(folder.out%&%'/Figures/Spudcounts_Aggregated_'%&%today()%&%'.pdf', family='serif', width=11, height=8.5)#,colormode='gray')
par(mai=c(0.8, 0.9, 0.5, 1.1))
plot(N ~ spud_month, data=spudcounts.onshore[prod_type=='Oil'], type='l', lty=1, lwd=3, col=cols[1], #xlim=c(as.Date('1990-01-01'),as.Date('2022-01-01')),
     xlab='Month', ylab='Number of Wells Drilled', cex.axis=1.5, cex.lab=1.5, main='', ylim=c(0, 3000))
grid(nx=NA, ny=NULL)
abline(h=0)
lines(N ~ spud_month, data=spudcounts.onshore[prod_type=='Gas'], type='l', lty=1, lwd=3, col=cols[2])
lines(N ~ spud_month, data=spudcounts.offshore, type='l', lty=1, lwd=3, col=colorspace::darken(my.cols[2], 0.5))
par(new=TRUE)
plot(wti ~ date, data=prices[date %in% spudcounts.offshore[, spud_month]], type='l', lwd=3, col='black', #xlim=c(as.Date('1990-01-01'),as.Date('2022-01-01')),
     xlab='', main='', xaxt='n', yaxt='n', ylim=c(0, 200), ylab='', lty=5)
lines(hh*6 ~ date, data=prices[date %in% spudcounts.offshore[, spud_month]], type='l', lty=5, lwd=3, col=colorspace::lighten('blue', 0.5))
axis(4, cex.axis=1.5)
corners = par('usr')
par(xpd=TRUE)
text(x = corners[2]+1000, y = mean(corners[3:4]), 
     'Oil and Gas Price \n (2020$ per barrel of oil equivalent)', cex=1.5, 
     srt = 270)
legend('topleft',legend=c('Onshore Oil Wells', 
                          'Onshore Gas Wells',
                          'Offshore Wells'),
       col=c(cols, colorspace::darken(my.cols[2], 0.5)), lwd=3, lty=1, bty='n', cex=1.5)
legend('topright',legend=c('WTI Oil Price', 'Henry Hub Gas Price'), col=c('black',colorspace::lighten('blue', 0.5)),
       lty=5, lwd=3, bty='n', cex=1.5)
dev.off()

library(urca)
# Test for unit root in each drilling time series
# Can't reject the null of a unit root for oil, gas, or combined
summary(ur.df(spudcounts.onshore[prod_type=='Oil', log(N+1)]))
summary(ur.df(spudcounts.onshore[prod_type=='Gas', log(N+1)]))
summary(ur.df(spudcounts.offshore[, log(N+1)]))

### Drilling Regressions
library(lmtest)
library(sandwich)
lag.length = 12
lag.form = c('shift(d.ln.wti, '%&%1:lag.length%&%')', 'shift(d.ln.hh, '%&%1:lag.length%&%')')
lag.form = paste(lag.form, collapse = ' + ')
lag.form = lag.form %&%' + as.factor(moy)' # use MOY FEs

# OLS
my.form = as.formula('d.ln.N ~ d.ln.wti + d.ln.hh + '%&% lag.form)
# my.form = as.formula('d.ln.N.highgas ~ d.ln.wti + d.ln.hh + '%&% lag.form)
# my.form = as.formula('d.ln.N.lowgas ~ d.ln.wti + d.ln.hh + '%&% lag.form)
lm.onshore.oil = lm(my.form, data=spudcounts.onshore[prod_type=='Oil'])
lm.onshore.gas = lm(my.form, data=spudcounts.onshore[prod_type=='Gas'])
lm.offshore = lm(my.form, data=spudcounts.offshore)
coeftest(lm.onshore.oil, vcov.=NeweyWest)

tot.effect(lm.onshore.oil, 'wti')  # (wti$)|(wti.*\\)$
tot.effect(lm.onshore.oil, 'hh')
tot.effect(lm.onshore.gas, 'wti')   
tot.effect(lm.onshore.gas, 'hh')   
tot.effect(lm.offshore, 'wti')
tot.effect(lm.offshore, 'hh')

# Compare to SUR
library(systemfit)
spudcounts.sys = rbind(spudcounts.onshore, spudcounts.offshore, fill=T)
spudcounts.sys[offshore==1, prod_type:='Offshore']
spudcounts.sys[, .N, by=prod_type]
spudcounts.sys = dcast(spudcounts.sys, spud_month ~ prod_type, value.var='d.ln.N')
spudcounts.sys = merge(spudcounts.sys, spudcounts.offshore[,.(spud_month, d.ln.wti, d.ln.hh, moy)], by='spud_month')

oil.form = as.formula('Oil ~ d.ln.wti + d.ln.hh + '%&% lag.form)
gas.form = as.formula('Gas ~ d.ln.wti + d.ln.hh + '%&% lag.form)
offshore.form = as.formula('Offshore ~ d.ln.wti + d.ln.hh + '%&% lag.form)

sur.fit = systemfit(formula = list(oil.form, gas.form, offshore.form), data=spudcounts.sys)
sur.fit.V = NeweyWest(sur.fit)

# Effectively the same:
tot.effect(sur.fit, 'eq2.*hh')
tot.effect(lm.onshore.gas, 'hh')

# Store results in lm.regs and elasts.lm
# Save resulting regressions and elasticities
lm.regs = array(list(), dim=c(2,2,2), 
                dimnames=list(c('Oil','Gas'),c('Nonfederal', 'Federal'), c('Onshore', 'Offshore')))
elasts.lm = array(NA, dim=c(2,3,2,2,2),
                  dimnames=list(c('wti','hh'), 
                                  c("eff","se","p.T"),
                             c('Oil','Gas'),
                             c('Nonfederal','Federal'),
                             c('Onshore','Offshore')))
lm.regs['Oil',,'Onshore'] = list(lm.onshore.oil, lm.onshore.oil)
lm.regs['Gas',,'Onshore'] = list(lm.onshore.gas, lm.onshore.gas)
lm.regs[,,'Offshore'] = list(lm.offshore, lm.offshore, lm.offshore, lm.offshore)

elasts.lm['wti',,'Oil',,'Onshore'] = tot.effect(lm.onshore.oil, 'wti')[-2]
elasts.lm['hh',,'Oil',,'Onshore'] = tot.effect(lm.onshore.oil, 'hh')[-2]
elasts.lm['wti',,'Gas',,'Onshore'] = tot.effect(lm.onshore.gas, 'wti')[-2]
elasts.lm['hh',,'Gas',,'Onshore'] = tot.effect(lm.onshore.gas, 'hh')[-2]
elasts.lm['wti',,,,'Offshore'] = tot.effect(lm.offshore, 'wti')[-2]
elasts.lm['hh',,,,'Offshore'] = tot.effect(lm.offshore, 'hh')[-2]

# Write out regression results
spudreg.tex = function(lm.temp, wt, rd=3) {
  x.sum = coeftest(lm.temp, vcov.=NeweyWest)
  coefs = x.sum[,-3] # drop t stat
  coefs = cbind(coefs[grep('wti', rownames(coefs)), ],
                0,  
                coefs[grep('hh',  rownames(coefs)), ])
  
  TE.wti = tot.effect(lm.temp, 'wti')[c(1,3,2)]
  # TE.wti = c(TE.wti[1:2], TE.wti[1]/TE.wti[2], TE.wti[3])
  TE.hh = tot.effect(lm.temp, 'hh')[c(1,3,2)]
  # TE.hh = c(TE.hh[1:2], TE.hh[1]/TE.hh[2], TE.hh[3])
  
  coefs = rbind(coefs, c(TE.wti, 0, TE.hh))
  
  rownames(coefs) = c('Current Price', '1 Lag', 2:12%&%' Lags', 'Cumulative')
  coefs = round(coefs, rd)
  
  coefs[,4] = gtools::stars.pval(coefs[,3])
  coefs = cbind(coefs, gtools::stars.pval(as.numeric(coefs[,7])))
  coefs = rbind(coefs, 
                c(round(length(lm.temp$residuals), rd), rep('',ncol(coefs)-1)),
                c(round(summary(lm.temp)$r.squared, rd), rep('',ncol(coefs)-1)))
  coefs[coefs=='0'] = '<0.001'
  
  rownames(coefs)[(nrow(coefs)-1):nrow(coefs)] = c('Number of Observations (Months)','R-Squared')
  
  file.nm = paste0('Spudregs_', wt)
  
  print(xtable::xtable(coefs), 
        file=folder.out%&%'/Tables/'%&%file.nm%&%'_'%&%today()%&%'.tex', include.rownames=T)
  
}
# debugonce(spudreg.tex)
spudreg.tex(lm.onshore.oil, wt='Onshore_Oil')
spudreg.tex(lm.onshore.gas, wt='Onshore_Gas')
spudreg.tex(lm.offshore, wt='Offshore')

elasts.plot = c(elasts.lm[,'eff','Oil','Federal','Onshore'],
                0,
                elasts.lm[,'eff','Gas','Federal','Onshore'],
                0,
                elasts.lm[,'eff','Oil','Federal','Offshore'])

elasts.se.plot = c(elasts.lm[,'se','Oil','Federal','Onshore'],
                elasts.lm[,'se','Gas','Federal','Onshore'],
                elasts.lm[,'se','Oil','Federal','Offshore'])

# Plot these. A pair of clustered bars for Onshore Oil, another pair for Onshore Gas, another pair for Offshore.
dev.off()
file.nm = 'Cum_elast_results_pooled_'%&%today()%&%'.pdf'
pdf(folder.out%&%'/Figures/'%&%file.nm, family='serif', width=11, height=8.5)
barplot(elasts.plot, names.arg = rep('', 8),
        ylim=c(-0.5,2), col = rep(c('#74645E','#88C4F4',0), times=3)[-9], space=0, ylab='Cumulative Drilling Elasticity', #main=og%&%' Wells',
        cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.name=1.5)
abline(v=c(2.5,5.5))
abline(h=seq(0,2, by=0.5), col='lightgray', lty=3)
abline(h=0)
barplot(elasts.plot, add=T, names.arg = rep(c('To\nOil\nPrices', 'To\nGas\nPrices',''), times=3)[-9], cex.names = 1.5,
        col = rep(c('#74645E','#88C4F4',0), times=3)[-9], space=0,
        cex.lab=1.5, cex.axis=1.5, cex.main=1.5)
t.crit = qnorm(0.95) # 90% CI
arrows(x0= which(elasts.plot!=0) - 0.5, x1 = which(elasts.plot!=0) - 0.5, 
       y0=elasts.plot[-c(3,6)] - t.crit*elasts.se.plot,
       y1=elasts.plot[-c(3,6)] + t.crit*elasts.se.plot,
       code=3, angle=90, length=0.1)
par(xpd=T)
text(x=1, y=2, labels='Onshore Oil Wells', pos=3, cex=1.5)
text(x=4, y=2, labels='Onshore Gas Wells', pos=3, cex=1.5)
text(x=7, y=2, labels='Offshore Wells', pos=3, cex=1.5)
dev.off()

length(residuals(lm.regs[[1]]))
spudcounts[complete.cases(d.ln.hh), unique(spud_month)] # 337 months with complete gas price data (Jan 1991 - Feb 2019)
elapsed_months('2019-03-01', '1991-02-02') # 337 months, including Feb 2019
nrow(model.frame(lm.regs[[1]])) # 325 observations, since we have 12 lags that loses the first 12 months of gas price data



